# !usr/bin/env python
# -*- coding:utf-8 _*-
"""
@Author:张广勤
@Web site: https://www.tunan.wang
@Github:www.github.com
 
@File:html2txt2_0.py
@Time:2024/9/4 21:54

@Motto:不积跬步无以至千里，不积小流无以成江海！
"""
import html2text

# 读取HTML文件
with open('./gongbao_quanguo/website_content.txt', 'r', encoding='utf-8') as html_file:
    html_content = html_file.read()

# h=html2text.HTML2Text()
# h.decode_errors = 'ignore'

h = html2text.HTML2Text()
h.ignore_images = True

# h = html2text.HTML2Text()
h.ignore_links = True

# h.ignore_tables = False
# h.escape_snob = True
# h.bypass_tables = True
h.pad_tables =True
# 将HTML转换为纯文本
text = h.handle(html_content)


# 将纯文本写入到TXT文件中
with open('example.txt', 'w', encoding='utf-8') as txt_file:
    txt_file.write(text)